Set up

library(data.table)
library(tidyverse)
library(smacof)
library(DESeq2)
library(InteractionSet)
library(diffHic)
library(broom)
library(highcharter)
library(heatmaply)
library(plotly)
library(eulerr)
library(reactable)
library(knitr)
# ("data.table", "tidyverse", "smacof", "DESeq2", "InteractionSet", "diffHic",
#       "broom", "highcharter", "heatmaply", "plotly", "eulerr")
# set your working directory
setwd("~/Documents/HGEN_663/extra/lec12")

Import

load('lec12.RData')

Contact matrices

There’s a number of ways through which we can examine the correlation matrix

Hierarchical clustering

tibble(s = names(hr)) %>%
  mutate(type = factor(sub('_.*', '', s))) %>%
  column_to_rownames('s') %>%
  heatmaply(hr, row_side_colors = ., col_side_colors = .,
            label_names = c("Sample 1", "Sample 2", "SCC"))

Multidimensional scaling

sim2diss(hr, method = 'corr') %>%
  mds(ndim = 2) %>%
  .$conf %>%
  as.data.frame() %>%
  rownames_to_column('s') %>%
  mutate(type = sub('_.*', '', s)) %>%
  plot_ly(x = ~D1, y = ~D2, color = ~type, text = ~s) %>%
  add_markers(legendgroup = ~type) %>%
  add_text(textposition = 'top left', showlegend = F, legendgroup = ~type)

P(s)

For the contact probability decay curves and their derivatives that we’ve computed before, we can again just simply visualize them

# colors for each cell type
hclrs <- c(ESC = "#7cb5ec", NPC = "#434348")

# plot data
pd <- ps %>%
  lapply(function(x) {
    d <- rbindlist(x, idcol = 'samp')
    ifelse('slope' %in% names(d), 'slope', 'balanced.avg') %>%
      c('samp', 's_bp', .) %>%
      d[, ., with = F] %>%
      `colnames<-`(c('samp', 'x', 'y'))
  }) %>%
  rbindlist(idcol = 'kind') %>%
  group_by(kind, samp) %>%
  do(data = list_parse2(data.frame(.$x, .$y))) %>%
  ungroup() %>%
  separate(samp, c('ctype', 'rep'), '_', F, fill = 'right') %>%
  #dplyr::filter(is.na(rep)) %>%
  mutate(color = hclrs[ctype],
         ctype = factor(ctype, names(hclrs))) %>%
  arrange(ctype) %>%
  mutate(name = samp,
         id = ifelse(kind != 'log', tolower(name), NA),
         linkedTo = ifelse(kind == 'log', tolower(name), NA),
         yAxis = ifelse(kind == 'log', 0, 2))

highchart() %>%
  # we use log scale for P(s) and linear for the slope (which was already taken in log space)
  hc_yAxis_multiples(list(title = list(text = "<b>Contact probability</b>, P<sub>c</sub>(s)",
                                       useHTML = TRUE),
                          type = "logarithmic",
                          labels = list(formatter = JS("function(){return this.value.toExponential(0);}")),
                          height = '45%', top = '0%', offset = 0),
                     list(height = '10%', top = '45%',
                          title = NULL,
                          plotLines = list(
                            list(color = "#a9a9a9", width = 2,
                                 value = .5, zIndex = 1)
                          ),
                          labels = list(enabled = F),
                          gridLineWidth = 0,
                          min = 0, max = 1),
                     list(type = "linear",
                          title = list(
                            text = "<b>Slope</b>, <sup>d</sup>&frasl;<sub>ds</sub> log P<sub>c</sub>(s)",
                            useHTML = TRUE
                          ),
                          height = '45%', top = '55%', offset = 0)) %>%
  # grab the data
  hc_add_series_list(pd) %>%
  # a bit of formatting
  hc_xAxis(type = "logarithmic",
           title = list(text = "<b>Genomic separation</b> (bp), s",
                        useHTML = T),
           minorTickInterval = 'auto',
           min = 1e4, max = 1e8) %>%
  hc_tooltip(headerFormat = '<span style="font-size: 10px">{point.key:,.0f} bp</span><br/>') %>%
  hc_chart(zoomType = "xy") %>%
  hc_plotOptions(line = list(marker = list(enabled = F)))

Compartments

Next we’ll visualize the first eigenvector (i.e., “compartment score”)

imageList <- list.files("~/Documents/HGEN_663/extra/lec12", pattern= "saddleplot.png", full.names=T)
include_graphics(imageList,dpi = 600)


Insulation

For the simplest comparison we can just count the number of shared boundaries

bdrs <- ins[c('ESC', 'NPC')] %>%
  lapply(function(x) {
    na.omit(x) %>%
      dplyr::filter(boundary_strength_100000 > .5) %>%
      mutate(start = start + 1) %>%
      makeGRangesFromDataFrame()
  })

Reduce(c, bdrs) %>% 
  unique() %>%
  {lapply(bdrs, function(x) overlapsAny(., x))} %>%
  bind_cols() %>%
  euler() %>%
  plot(quantities = T)


Loops

Called loops

These are loops called separately in each sample. There are specific classes in R that handles paired ranges, one of which is GInteractions

lps <- dots[c('ESC', 'NPC')] %>%
  lapply(function(x) {
    list(x[,1:3], x[,4:6]) %>%
      lapply(function(y) {
        y %>%
          `colnames<-`(c('chr', 'start', 'end')) %>%
          mutate(start = start + 1) %>%
          makeGRangesFromDataFrame()
      }) %>%
      {GInteractions(.[[1]], .[[2]], mode = 'reverse')}
  })

ESC

lps$ESC 
## ReverseStrictGInteractions object with 3738 interactions and 0 metadata columns:
##          seqnames1             ranges1     seqnames2             ranges2
##              <Rle>           <IRanges>         <Rle>           <IRanges>
##      [1]      chr1     4750001-4775000 ---      chr1     4500001-4525000
##      [2]      chr1     5000001-5025000 ---      chr1     4475001-4500000
##      [3]      chr1     5900001-5925000 ---      chr1     4475001-4500000
##      [4]      chr1     5900001-5925000 ---      chr1     4750001-4775000
##      [5]      chr1     5900001-5925000 ---      chr1     4900001-4925000
##      ...       ...                 ... ...       ...                 ...
##   [3734]      chrX 167800001-167825000 ---      chrX 167300001-167325000
##   [3735]      chrX 167800001-167825000 ---      chrX 167475001-167500000
##   [3736]      chrX 168925001-168950000 ---      chrX 168400001-168425000
##   [3737]      chrX 168925001-168950000 ---      chrX 168700001-168725000
##   [3738]      chrX 169275001-169300000 ---      chrX 168925001-168950000
##   -------
##   regions: 5537 ranges and 0 metadata columns
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths

NPC

lps$NPC
## ReverseStrictGInteractions object with 4394 interactions and 0 metadata columns:
##          seqnames1             ranges1     seqnames2             ranges2
##              <Rle>           <IRanges>         <Rle>           <IRanges>
##      [1]      chr1     4750001-4775000 ---      chr1     4500001-4525000
##      [2]      chr1     5900001-5925000 ---      chr1     4750001-4775000
##      [3]      chr1     5900001-5925000 ---      chr1     5100001-5125000
##      [4]      chr1     6125001-6150000 ---      chr1     5100001-5125000
##      [5]      chr1     6125001-6150000 ---      chr1     5900001-5925000
##      ...       ...                 ... ...       ...                 ...
##   [4390]      chrX 167225001-167250000 ---      chrX 166650001-166675000
##   [4391]      chrX 167200001-167225000 ---      chrX 166800001-166825000
##   [4392]      chrX 168925001-168950000 ---      chrX 167275001-167300000
##   [4393]      chrX 168925001-168950000 ---      chrX 168650001-168675000
##   [4394]      chrX 169775001-169800000 ---      chrX 169325001-169350000
##   -------
##   regions: 6117 ranges and 0 metadata columns
##   seqinfo: 20 sequences from an unspecified genome; no seqlengths

Loop pile-up

imageList <- list.files("~/Documents/HGEN_663/extra/lec12", pattern= "pileup.png", full.names=T)
include_graphics(imageList,dpi = 600)

Differential loop calling

diff_loop <- fread("output.pareidolia.tsv")
diff_loop %>% reactable()